from sgmllib import SGMLParser
import urllib

class URLLister(SGMLParser):
    def reset(self):
        SGMLParser.reset(self)
        self.urls = []

    def start_a(self,attrs):
        href = [(k,v) for k,v in attrs if k == 'href' and v.startswith('h')]
        if href:
            self.urls.extend(href)

if __name__ == '__main__':
    usock = urllib.urlopen('http://cn.bing.com')
    parser = URLLister()
    parser.feed(usock.read())
    usock.close()
    parser.close()
    for url in parser.urls:
        print url
